/**
* Copyright (c) 2010, Regents of the University of Colorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.ftr.xml;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

/**
 * Abstract feature template.
 * @author Jinho D. Choi
 * <b>Last update:</b> 4/12/2011
 */
abstract public class AbstractFtrXml
{
	static protected final String TEMPLATE	= "feature_template";
	static protected final String CUTOFF	= "cutoff";
	static protected final String LABEL		= "label";
	/** N-gram feature */
	static protected final String NGRAM		= "ngram";
	/** Extra feature */
	static protected final String EXTRA		= "extra";
	/** Number of tokens */
	static protected final String N			= "n";
	/** Cutoff (>= 0) */
	static protected final String C			= "c";
	/** Type (e.g., "pp", "ump") */
	static protected final String T			= "t";
	/** Discrete field (e.g., "f", "m", "p", "d") */
	static protected final String F			= "f";
	/** "true" | "false" */
	static protected final String VISIBLE	= "visible"; 
	/** Field delimiter (e.g., l+1.f) */
	static protected final String DELIM_F	= ":";
	/** Relation delimiter (e.g., l_hd) */
	static protected final String DELIM_R	= "_";
	
	/** N-gram feature [type][templates] */
	public FtrTemplate[][] a_ngram_templates;
	public int n_cutoff_label;
	public int n_cutoff_ngram;
	public int n_cutoff_extra;
	
	public AbstractFtrXml(String featureXml)
	{
		try
		{
			init(new FileInputStream(featureXml));
		}
		catch (FileNotFoundException e) {e.printStackTrace();}
	}
	
	public AbstractFtrXml(InputStream fin)
	{
		init(fin);
	}
	
	public void init(InputStream fin)
	{
		DocumentBuilderFactory dFactory = DocumentBuilderFactory.newInstance();
		
		try
		{
			DocumentBuilder builder = dFactory.newDocumentBuilder();
			Document        doc     = builder.parse(fin);
			
			initCutoffs (doc);
			initNgrams  (doc);
			initFeatures(doc);
		}
		catch (Exception e) {e.printStackTrace();System.exit(1);}
	}
	
	/** Initializes cutoffs. */
	protected void initCutoffs(Document doc) throws Exception
	{
		NodeList eList = doc.getElementsByTagName(CUTOFF);
		if (eList.getLength() <= 0)	return;
		Element eCutoff = (Element)eList.item(0);
		
		n_cutoff_label = (eCutoff.hasAttribute(LABEL)) ? Integer.parseInt(eCutoff.getAttribute(LABEL)) : 0;
		n_cutoff_ngram = (eCutoff.hasAttribute(NGRAM)) ? Integer.parseInt(eCutoff.getAttribute(NGRAM)) : 0;
		n_cutoff_extra = (eCutoff.hasAttribute(EXTRA)) ? Integer.parseInt(eCutoff.getAttribute(EXTRA)) : 0;
	}
	
	protected void initNgrams(Document doc) throws Exception
	{
		NodeList eList = doc.getElementsByTagName(NGRAM);
		HashMap<String, ArrayList<FtrTemplate>> map = new HashMap<String, ArrayList<FtrTemplate>>();
		
		int i, n = eList.getLength();
		Element eFeature;
		
		for (i=0; i<n; i++)
		{
			eFeature = (Element)eList.item(i);
			if (eFeature.getAttribute(VISIBLE).trim().equals("false"))	continue;
			
			FtrTemplate ftr = getFtrTemplate(eFeature);
			
			if (map.containsKey(ftr.type))
			{
				map.get(ftr.type).add(ftr);
			}
			else
			{
				ArrayList<FtrTemplate> list = new ArrayList<FtrTemplate>();
				map.put(ftr.type, list);
				list.add(ftr);
			}
		}
		
		n = map.size();
		a_ngram_templates = new FtrTemplate[n][];
		ArrayList<String> keys = new ArrayList<String>(map.keySet());
		Collections.sort(keys);
		
		for (i=0; i<n; i++)
		{
			ArrayList<FtrTemplate> list = map.get(keys.get(i));
			FtrTemplate[] arr = new FtrTemplate[list.size()];
			list.toArray(arr);
			a_ngram_templates[i] = arr;
		}
	}
	
	/** Convert the element to {@link FtrTemplate}. */
	protected FtrTemplate getFtrTemplate(Element eFeature)
	{
		int nToken = Integer.parseInt(eFeature.getAttribute(N));
		int cutoff = (eFeature.hasAttribute(C)) ? Integer.parseInt(eFeature.getAttribute(C)) : 0;
		
		FtrTemplate   ftr   = new FtrTemplate(nToken, cutoff);
		StringBuilder build = new StringBuilder();

		int i;	String type;
		
		for (i=0; i<nToken; i++)
		{
			FtrToken tok = getFtrToken(eFeature.getAttribute(F + i));
			ftr.addFtrToken(i, tok);
			build.append(tok.field);
		}
		
		if (eFeature.hasAttribute(T))	type = eFeature.getAttribute(T).trim();
		else							type = build.toString();
		
		ftr.setType(type);
		return ftr;
	}
	
	/** @param ftr (e.g., "l.f", "l+1.m", "l-1.p", "l0_hd.d") */
	protected FtrToken getFtrToken(String ftr)
	{
		String[] aField    = ftr      .split(DELIM_F);	// {"l-1_hd", "p"}
		String[] aRelation = aField[0].split(DELIM_R);	// {"l-1", "hd"} 
		
		char source = aRelation[0].charAt(0);
		if (!validSource(source))	xmlError(ftr);
		
		int offset = 0;
		if (aRelation[0].length() >= 2)
		{
			if (aRelation[0].charAt(1) == '+')	offset = Integer.parseInt(aRelation[0].substring(2)); 
			else								offset = Integer.parseInt(aRelation[0].substring(1));
		}
		
		String relation = null;
		if (aRelation.length > 1)
		{
			relation = aRelation[1];
			if (!validRelation(relation))	xmlError(ftr);
		}
		
		String field = aField[1];
		if (!validField(field))	xmlError(ftr);

		return new FtrToken(source, offset, relation, field);
	}
	
	/** Prints system error and exits. */
	protected void xmlError(String error)
	{
		System.err.println("Invalid feature: "+error);
		System.exit(1);
	}
	
	/** Initializes other kinds of features. */
	abstract protected void    initFeatures(Document doc) throws Exception;
	abstract protected boolean validSource(char source);
	abstract protected boolean validRelation(String relation);
	abstract protected boolean validField(String filed);
	
	protected void toStringAux(StringBuilder build, String type, FtrTemplate ftr)
	{
		build.append("    <");
		build.append(type);
		build.append(" ");
		build.append(ftr.toString());
		build.append("/>\n");	
	}
}